; void __fastcall__ decompress_lzsa1(const void *src, void *dest)
;
; NMOS 6502 decompressor for data stored in Emmanuel Marty's LZSA1 format.
;
; Compress with:
; lzsa -r -f 1 input.bin output.lzsa1
;
; Copyright John Brandwood 2021.
;
; Distributed under the Boost Software License, Version 1.0.
; Boost Software License - Version 1.0 - August 17th, 2003
;
; Permission is hereby granted, free of charge, to any person or organization
; obtaining a copy of the software and accompanying documentation covered by
; this license (the "Software") to use, reproduce, display, distribute,
; execute, and transmit the Software, and to prepare derivative works of the
; Software, and to permit third-parties to whom the Software is furnished to
; do so, all subject to the following:
;
; The copyright notices in the Software and this entire statement, including
; the above license grant, this restriction and the following disclaimer,
; must be included in all copies of the Software, in whole or in part, and
; all derivative works of the Software, unless such copies or derivative
; works are solely in the form of machine-executable object code generated by
; a source language processor.
;
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
; FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
; SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
; FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
; DEALINGS IN THE SOFTWARE.

        .export         _decompress_lzsa1

        .import         popax
        .importzp       ptr1, ptr2, ptr3, ptr4, tmp1, tmp2, tmp3

lzsa_cmdbuf     =       tmp1                    ; 1 byte.
lzsa_winptr     =       ptr1                    ; 1 word.
lzsa_srcptr     =       ptr2                    ; 1 word.
lzsa_dstptr     =       ptr3                    ; 1 word.

lzsa_offset     =       lzsa_winptr

.proc _decompress_lzsa1
                sta     lzsa_dstptr
                stx     lzsa_dstptr+1
                jsr     popax
                sta     lzsa_srcptr
                stx     lzsa_srcptr+1

lzsa1_unpack:   ldy     #0                      ; Initialize source index.
                ldx     #0                      ; Initialize hi-byte of length.

                ;
                ; Copy bytes from compressed source data.
                ;
                ; N.B. X=0 is expected and guaranteed when we get here.
                ;

cp_length:      lda     (lzsa_srcptr),y
                inc     lzsa_srcptr
                bne     cp_skip0
                inc     lzsa_srcptr+1

cp_skip0:       sta     lzsa_cmdbuf             ; Preserve this for later.
                and     #$70                    ; Extract literal length.
                lsr                             ; Set CC before ...
                beq     lz_offset               ; Skip directly to match?

                lsr                             ; Get 3-bit literal length.
                lsr
                lsr
                cmp     #$07                    ; Extended length?
                bcc     cp_got_len

                jsr     get_length              ; X=0, CS from CMP, returns CC.
                stx     cp_npages+1             ; Hi-byte of length.

cp_got_len:     tax                             ; Lo-byte of length.

cp_byte:        lda     (lzsa_srcptr),y         ; CC throughout the execution of
                sta     (lzsa_dstptr),y         ; of this .cp_page loop.
                inc     lzsa_srcptr
                bne     cp_skip1
                inc     lzsa_srcptr+1
cp_skip1:       inc     lzsa_dstptr
                bne     cp_skip2
                inc     lzsa_dstptr+1
cp_skip2:       dex
                bne     cp_byte
cp_npages:      lda     #0                      ; Any full pages left to copy?
                beq     lz_offset

                dec     cp_npages+1            ; Unlikely, so can be slow.
                bcc     cp_byte                ; Always true!

                ;
                ; Copy bytes from decompressed window.
                ;
                ; Longer but faster.
                ;
                ; N.B. X=0 is expected and guaranteed when we get here.
                ;

lz_offset:      lda     (lzsa_srcptr),y         ; Get offset-lo.
                inc     lzsa_srcptr
                bne     offset_lo
                inc     lzsa_srcptr+1

offset_lo:      sta     lzsa_offset

                lda     #$FF                    ; Get offset-hi.
                bit     lzsa_cmdbuf
                bpl     offset_hi

                lda     (lzsa_srcptr),y
                inc     lzsa_srcptr
                bne     offset_hi
                inc     lzsa_srcptr+1

offset_hi:      sta     lzsa_offset+1

lz_length:      lda     lzsa_cmdbuf             ; X=0 from previous loop.
                and     #$0F
                adc     #$03                    ; Always CC from .cp_page loop.
                cmp     #$12                    ; Extended length?
                bcc     got_lz_len

                jsr     get_length              ; X=0, CS from CMP, returns CC.

got_lz_len:     inx                             ; Hi-byte of length+256.

                eor     #$FF                    ; Negate the lo-byte of length
                tay
                eor     #$FF

get_lz_dst:     adc     lzsa_dstptr             ; Calc address of partial page.
                sta     lzsa_dstptr             ; Always CC from previous CMP.
                iny
                bcs     get_lz_win
                beq     get_lz_win              ; Is lo-byte of length zero?
                dec     lzsa_dstptr+1

get_lz_win:     clc                             ; Calc address of match.
                adc     lzsa_offset             ; N.B. Offset is negative!
                sta     lzsa_winptr
                lda     lzsa_dstptr+1
                adc     lzsa_offset+1
                sta     lzsa_winptr+1

lz_byte:        lda     (lzsa_winptr),y
                sta     (lzsa_dstptr),y
                iny
                bne     lz_byte
                inc     lzsa_dstptr+1
                dex                             ; Any full pages left to copy?
                bne     lz_more

                jmp     cp_length               ; Loop around to the beginning.

lz_more:        inc     lzsa_winptr+1           ; Unlikely, so can be slow.
                bne     lz_byte                 ; Always true!

                ;
                ; Get 16-bit length in X:A register pair, return with CC.
                ;
                ; N.B. X=0 is expected and guaranteed when we get here.
                ;

get_length:     clc                             ; Add on the next byte to get
                adc     (lzsa_srcptr),y         ; the length.
                inc     lzsa_srcptr
                bne     skip_inc
                inc     lzsa_srcptr+1

skip_inc:       bcc     got_length              ; No overflow means done.
                clc                             ; MUST return CC!
                tax                             ; Preserve overflow value.

extra_byte:     jsr     get_byte                ; So rare, this can be slow!
                pha
                txa                             ; Overflow to 256 or 257?
                beq     extra_word

check_length:   pla                             ; Length-lo.
                bne     got_length              ; Check for zero.
                dex                             ; Do one less page loop if so.
got_length:     rts

extra_word:     jsr     get_byte                ; So rare, this can be slow!
                tax
                bne     check_length            ; Length-hi == 0 at EOF.

finished:       pla                             ; Length-lo.
                pla                             ; Decompression completed, pop
                pla                             ; return address.
                rts

get_byte:       lda     (lzsa_srcptr),y         ; Subroutine version for when
                inc     lzsa_srcptr             ; inlining isn't advantageous.
                bne     got_byte
                inc     lzsa_srcptr+1          ; Inc & test for bank overflow.
got_byte:       rts
.endproc
